# twoPartyVote.R

# Part of the replication archive for 
#
#   Bullock, John G. 2020. "Education and Attitudes toward Redistribution in
#   the United States." British Journal of Political Science 50.


# This file loads and codes data on the two-party vote. It creates a data 
# frame called "twoPartyVote". The main variables in the data frame are 
# state.2L, YEAR, and Dem.2p. 


library(Bullock, lib.loc = c(.libPaths(), 'packageLibrary'))  # for qw()
library(car)      # for Recode()
library(zoo)      # for na.approx()
twoPartyVote <- read.csv('data/Presidential Vote by State 1912-2008.csv') %>%
  rename(Dem = "DEM.", Rep = "REP.") %>%
  mutate(
    Dem.2p = Dem / (Dem + Rep),
    Rep.2p = Rep / (Dem + Rep))
twoPartyVote$state.2L <- setNames(state.abb, state.name)[as.character(twoPartyVote$STATE)] %>%
  Recode(., 'c("AK", "HI") = NA') 
twoPartyVote$state.2L[twoPartyVote$STATE == 'D. C.'] <- 'DC'
twoPartyVote$state.2L <- factor(twoPartyVote$state.2L) %>% droplevels()
twoPartyVote <- twoPartyVote %>%
  select(YEAR, state.2L, everything()) %>%  # make state.2L the second column...
  select(-STATE) %>%                        #   ...and drop STATE
  na.omit()                                 # omit rows for AK and HI


# ADD ROWS FOR OFF-ELECTION YEARS
yearsToAdd <- tibble(year = 1912:2008) %>% 
  filter(! year%%4 == 0)
rowsToAdd <- expand.grid(
  YEAR     = yearsToAdd$year,
  state.2L = levels(twoPartyVote$state.2L))
twoPartyVote <- bind_rows(twoPartyVote, rowsToAdd) %>%
  arrange(state.2L, YEAR)


# IMPUTE TWO-PARTY VOTE FOR NON-PRESIDENTIAL-ELECTION ELECTION YEARS 
twoPartyVote <- twoPartyVote %>%
  group_by(state.2L) %>%
  mutate_at(
    .vars = qw("Dem Rep Dem.2p Rep.2p"),
    .funs = na.approx,
    na.rm = FALSE)
